/*******************************************************************************
* Long-term effects of weather-induced migration on urban labor and housing 
  markets
* Busso & Chauvin

* Purpose: Prepare dataset.

*******************************************************************************/

/* Check if ssaggregate is installed, install it if not */
cap which "ssaggregate"
if _rc == 111 {
  ssc install "ssaggregate" , replace all
}

/***************************************************************/
/* Environment */
/***************************************************************/ 
/* Geographic unities */
* Origin (rural)
global geoorig mca8010

* Rural/urban location identifier
global geodest mcarp8010
global geo mcarp8010

/* Periods */
global years 9110 0010
global years_housing 9110

/***************************************************************/
/* Endogenous variables, instruments, controls, fe dataset */
/***************************************************************/ 
tempfile rur_spei_0009
tempfile cities_panel
tempfile com_time
tempfile city_rur_mig_91_sh

****************************************************************
/* var city_rur_mig_91_sh  */
use mig_share_t_rur ${geoorig} ${geodest} using "${db_tmp}/od_matrices", clear
drop if missing(mig_share_t_rur)
rename mig_share_t_rur city_rur_mig_91_sh
save `city_rur_mig_91_sh', replace

****************************************************************
/* var rate_mig_0110_rural, city_shift_share */
use ${geodest} rate_mig_0110_rural shift_0009_dws_t_rur shift_0009_dws_t_rur_abs ///
shift_8190_dws_t_rur imf_sh isv_sh igv_sh sh_emp_col l_pop_all91 pop_growth8091 ///
pop_growth0010 year ///
using "${db_tmp}/${geo}_analysis_dataset" if year == 1991, clear

gen macroreg = floor(${geodest}/1000000)
tab macroreg, gen(city_macrofe_)
drop macroreg year
	
save `cities_panel', replace

/* var commuting time 2010 */
use ${geodest} comtime* year using "${db_tmp}/${geo}_analysis_dataset" ///
if year == 2010, clear
drop year

save `com_time', replace

****************************************************************
/* var rur_spei_0009   */

use ${geoorig} drought_dws0009 using "$db_tmp/drought_index_spei8010.dta", clear
save `rur_spei_0009', replace

****************************************************************
/* Merge all  */
use `city_rur_mig_91_sh', clear

merge m:1 ${geodest} using `cities_panel'
drop if ${geodest} == 1000000
assert _merge == 3
drop _merge

merge m:1 ${geodest} using `com_time'
drop if ${geodest} == 1000000
assert _merge == 3
drop _merge

merge m:1 ${geoorig} using `rur_spei_0009'
drop if _merge == 2
assert _merge == 3
drop _merge

/* labels */
label variable mcarp8010 "Destination cities identifier"
label variable mca8010 "Rural municipalities of origin of rural-urban migrants identifier"
label variable city_rur_mig_91_sh "Share of rural-urban migrants that arrived in the corresponding mcarp8010 in the 5 years preceding the 1991 census and that came from rural area mca8010"
label variable rate_mig_0110_rural "Rural immigration rate to the city (endogenous explanatory variable)"
label variable shift_0009_dws_t_rur "Shift-share shock for mcarp8010 that uses the 2000-2009 average SPEI and the 1991 shares"
label variable city_macrofe_1 "Macro region fixed effect 1 for mcarp8010"
label variable city_macrofe_2 "Macro region fixed effect 2 for mcarp8010"
label variable city_macrofe_3 "Macro region fixed effect 3 for mcarp8010"
label variable city_macrofe_4 "Macro region fixed effect 4 for mcarp8010"
label variable city_macrofe_5 "Macro region fixed effect 5 for mcarp8010"
label variable isv_sh "City-level services share in the city, 1991"
label variable igv_sh "City-level government share in the city, 1991"
label variable imf_sh "City-level manufacturing share in the city, 1991"
label variable sh_emp_col "City-level share of college-educated in employment, 1991"
label variable l_pop_all91 "Log of population, 1991"
label variable pop_growth8091 "Lagged population growth, 1980-1991"
label variable pop_growth0010 "Current population growth, 2000-2010"
label variable shift_8190_dws_t_rur "Shift-share dummy for 1981-1990, rural areas"
label variable shift_0009_dws_t_rur "Shift-share shock for mcarp8010 that uses the 2000-2009 average SPEI and the 1991 shares"
label variable shift_0009_dws_t_rur_abs "Shift-share shock for mcarp8010 that uses the 2000-2009 absolute value of SPEI and the 1991 shares"
label variable comtime_10 "City average commuting time, 2010"
label variable comtime_10_l "City average commuting time - precarious housing, 2010"
label variable comtime_10_m "City average commuting time - low-quality housing, 2010"
label variable comtime_10_h "City average commuting time - quality housing, 2010"

/* save */
save ${db_tmp}/borusyak_dataset.dta, replace

/***************************************************************/
/* Outcomes dataset */
/***************************************************************/ 
/* Extract city outcomes */ 
use $db_tmp/mcarp8010_analysis_dataset.dta, clear

foreach i in $years_housing {

	global outcomes_`i'_labor d_l_wageres2_r_`i' d_l_emp_r_`i' ///
	d_l_wageres2_r_isv_`i' d_l_emp_r_isv_`i' d_l_wageres2_r_imf_`i' ///
	d_l_emp_r_imf_`i' d_l_wageres2_r_nohs_`i' d_l_emp_r_nohs_`i' ///
	d_l_wageres2_r_hs_`i' d_l_emp_r_hs_`i' d_l_wageres2_g_`i' d_l_emp_g_`i' ///
	d_rate_part_r_`i' d_imf_sh_r_`i' d_isv_sh_r_`i' d_hs_r_`i' ///
	d_rate_inform_r_`i' d_rate_inform2_r_`i'

	global outcomes_`i'_housing d_l_rentres2_r_`i' d_l_houses_r_`i' d_l_rooms_r_`i' ///	
	d_l_rentres2_l_r_`i' d_l_houses_l_r_`i' d_l_rooms_l_r_`i' ///
	d_l_rentres2_m_r_`i' d_l_houses_m_r_`i' d_l_rooms_m_r_`i' ///
	d_l_rentres2_h_r_`i' d_l_houses_h_r_`i' d_l_rooms_h_r_`i' ///
	d_l_rentres2_g_`i' d_l_houses_g_`i' d_l_rooms_g_`i' d_own_all_`i' ///
	d_own_all_l_`i' d_own_all_m_`i' d_own_all_h_`i' d_own_residents_`i' ///
	d_own_residents_l_`i' d_own_residents_m_`i' d_own_residents_h_`i' ///
	d_l_rentres2_`i' d_l_houses_`i' d_l_rooms_`i' ///
	d_l_rentres2_l_`i' d_l_houses_l_`i' d_l_rooms_l_`i' ///
	d_l_rentres2_m_`i' d_l_houses_m_`i' d_l_rooms_m_`i' ///
	d_l_rentres2_h_`i' d_l_houses_h_`i' d_l_rooms_h_`i' ///	
	d_l_rentres2_l_g_`i' d_l_houses_l_g_`i' d_l_rooms_l_g_`i' ///
	d_l_rentres2_m_g_`i' d_l_houses_m_g_`i' d_l_rooms_m_g_`i' ///
	d_l_rentres2_h_g_`i' d_l_houses_h_g_`i' d_l_rooms_h_g_`i' 

}

global outcomes_0010_labor d_l_wageres2_r_0010 d_l_emp_r_0010 ///
d_l_wageres2_r_isv_0010 d_l_emp_r_isv_0010 d_l_wageres2_r_imf_0010 d_l_emp_r_imf_0010 ///
d_l_wageres2_r_nohs_0010 d_l_emp_r_nohs_0010 d_l_wageres2_r_hs_0010 d_l_emp_r_hs_0010 ///
d_l_wageres2_g_0010 d_l_emp_g_0010 

global cities_fe d_cityfe_l_wage_r_9110 d_cityfe_l_wage_r_imf_9110 ///
d_cityfe_l_wage_r_isv_9110 d_cityfe_l_wage_r_nohs_9110 d_cityfe_l_wage_r_hs_9110 ///
d_cityfe_l_wage_g_9110 d_cityfe_l_rent_r_9110 d_cityfe_l_rent_l_rr_9110 ///
d_cityfe_l_rent_m_rr_9110 d_cityfe_l_rent_h_rr_9110 d_cityfe_l_rent_g_9110

keep mcarp8010 year $outcomes_9110_labor $outcomes_9110_housing ///
$outcomes_0010_labor expulsion $cities_fe

/* labels */

/* save */
save $db_tmp/outcomes_cities.dta, replace

/***************************************************************/
/* Create city-level dataset */
/***************************************************************/ 
/* Open city-rural area dataset */
use "$db_tmp/borusyak_dataset.dta", clear 

/* Define set of variables to transform from city to rural-area level */
global cvar1 rate_mig_0110_rural // endogenous variables 
global cvar2 shift_0009_dws_t_rur shift_0009_dws_t_rur_abs shift_8190_dws_t_rur // instruments
global cvar3 imf_sh isv_sh igv_sh sh_emp_col pop_growth8091 l_pop_all91 pop_growth0010 comtime_10*  // controls
global cvar4 city_macrofe_1 city_macrofe_2 city_macrofe_3 city_macrofe_4 city_macrofe_5 // fixed effects

preserve
    keep mcarp8010 $cvar1 $cvar2 $cvar3 $cvar4 
    duplicates drop
	
    /* Merge with city-level outcomes */ 
    merge 1:m mcarp8010 using $db_tmp/outcomes_cities.dta
    drop _merge

    /* We will input missing within variable sets to keep samples constant within set */ 
    /* This is because some outcome variables have missing variables */ 
	foreach i in $years {
	 
		/*  Labor set 1 */ 
		egen aux = rowmiss(d_l_wageres2_r_`i' d_l_emp_r_`i')
		foreach var of varlist d_l_wageres2_r_`i' d_l_emp_r_`i' {
			replace `var' = . if aux > 0
		}
		drop aux

		/*  Labor set 2 */ 
		egen aux = rowmiss(d_l_wageres2_r_isv_`i' d_l_emp_r_isv_`i')
		foreach var of varlist d_l_wageres2_r_isv_`i' d_l_emp_r_isv_`i' {
			replace `var' = . if aux > 0
		}
		drop aux

		/*  Labor set 3 */ 
		egen aux = rowmiss(d_l_wageres2_r_imf_`i' d_l_emp_r_imf_`i')
		foreach var of varlist d_l_wageres2_r_imf_`i' d_l_emp_r_imf_`i' {
			replace `var' = . if aux > 0
		}
		drop aux
		
		/*  Labor set 4 */ 
		egen aux = rowmiss(d_l_wageres2_r_nohs_`i' d_l_emp_r_nohs_`i')
		foreach var of varlist d_l_wageres2_r_nohs_`i' d_l_emp_r_nohs_`i' {
			replace `var' = . if aux > 0
		}
		drop aux
		
		/*  Labor set 5 */ 
		egen aux = rowmiss(d_l_wageres2_r_hs_`i' d_l_emp_r_hs_`i')
		foreach var of varlist d_l_wageres2_r_hs_`i' d_l_emp_r_hs_`i' {
			replace `var' = . if aux > 0
		}
		drop aux	
		
		/*  Labor set 6 */ 
		egen aux = rowmiss(d_l_wageres2_g_`i' d_l_emp_g_`i')
		foreach var of varlist d_l_wageres2_g_`i' d_l_emp_g_`i' {
			replace `var' = . if aux > 0
		}
		drop aux
	}
	
	foreach i in $years_housing {
		
		/* Housing set 1 */ 
		egen aux = rowmiss(d_l_rentres2_r_`i' d_l_houses_r_`i' d_l_rooms_r_`i')
		foreach var of varlist d_l_rentres2_r_`i' d_l_houses_r_`i' d_l_rooms_r_`i' {
			replace `var' = . if aux > 0
		}
		drop aux
		
		/* Housing set 2 */ 
		egen aux = rowmiss(d_l_rentres2_l_r_`i' d_l_houses_l_r_`i' d_l_rooms_l_r_`i')
		foreach var of varlist d_l_rentres2_l_r_`i' d_l_houses_l_r_`i' d_l_rooms_l_r_`i' {
			replace `var' = . if aux > 0
		}
		drop aux
		
		/* Housing set 3 */ 
		egen aux = rowmiss(d_l_rentres2_m_r_`i' d_l_houses_m_r_`i' d_l_rooms_m_r_`i')
		foreach var of varlist d_l_rentres2_m_r_`i' d_l_houses_m_r_`i' d_l_rooms_m_r_`i' {
			replace `var' = . if aux > 0
		}
		drop aux
		
		/* Housing set 4 */ 
		egen aux = rowmiss(d_l_rentres2_h_r_`i' d_l_houses_h_r_`i' d_l_rooms_h_r_`i')
		foreach var of varlist d_l_rentres2_h_r_`i' d_l_houses_h_r_`i' d_l_rooms_h_r_`i' {
			replace `var' = . if aux > 0
		}
		drop aux
		
		/* Housing set 5 */ 
		egen aux = rowmiss(d_l_rentres2_g_`i' d_l_houses_g_`i' d_l_rooms_g_`i')
		foreach var of varlist d_l_rentres2_g_`i' d_l_houses_g_`i' d_l_rooms_g_`i' {
			replace `var' = . if aux > 0
		}
		drop aux
		
		/* Housing set 6 */ 
		egen aux = rowmiss(d_l_rentres2_`i' d_l_houses_`i' d_l_rooms_`i')
		foreach var of varlist d_l_rentres2_`i' d_l_houses_`i' d_l_rooms_`i' {
			replace `var' = . if aux>0
		}
		drop aux
		
		/* Housing set 7 */ 
		egen aux = rowmiss(d_l_rentres2_l_`i' d_l_houses_l_`i' d_l_rooms_l_`i')
		foreach var of varlist d_l_rentres2_l_`i' d_l_houses_l_`i' d_l_rooms_l_`i' {
			replace `var' = . if aux>0
		}
		drop aux
		
		/* Housing set 8 */ 
		egen aux = rowmiss(d_l_rentres2_m_`i' d_l_houses_m_`i' d_l_rooms_m_`i')
		foreach var of varlist d_l_rentres2_m_`i' d_l_houses_m_`i' d_l_rooms_m_`i' {
			replace `var' = . if aux>0
		}
		drop aux
		
		/* Housing set 9 */ 
		egen aux = rowmiss(d_l_rentres2_h_`i' d_l_houses_h_`i' d_l_rooms_h_`i')
		foreach var of varlist d_l_rentres2_h_`i' d_l_houses_h_`i' d_l_rooms_h_`i' {
			replace `var' = . if aux>0
		}
		drop aux		
			
		/* Housing set 10 */ 
		egen aux = rowmiss(d_l_rentres2_l_g_`i' d_l_houses_l_g_`i' d_l_rooms_l_g_`i')
		foreach var of varlist d_l_rentres2_l_g_`i' d_l_houses_l_g_`i' d_l_rooms_l_g_`i' {
			replace `var' = . if aux>0
		}
		drop aux
		
		/* Housing set 11 */ 
		egen aux = rowmiss(d_l_rentres2_m_g_`i' d_l_houses_m_g_`i' d_l_rooms_m_g_`i')
		foreach var of varlist d_l_rentres2_m_g_`i' d_l_houses_m_g_`i' d_l_rooms_m_g_`i' {
			replace `var' = . if aux>0
		}
		drop aux
		
		/* Housing set 12 */ 
		egen aux = rowmiss(d_l_rentres2_h_g_`i' d_l_houses_h_g_`i' d_l_rooms_h_g_`i')
		foreach var of varlist d_l_rentres2_h_g_`i' d_l_houses_h_g_`i' d_l_rooms_h_g_`i' {
			replace `var' = . if aux>0
		}
		drop aux		
	}
    save $db_out/shift_share_db_cities.dta, replace
	
	rm "$db_tmp/outcomes_cities.dta"
	rm "$db_tmp/borusyak_dataset.dta"
	
restore

/***************************************************************/
/* Cluster variables */
/***************************************************************/
preserve 
	keep mca8010
	duplicates drop mca8010, force 
	merge 1:m mca8010 using "${db_inp}/geo_codes_sat_cities.dta", keepusing(mcm8010 mcme8010)
	keep if _merge == 3
	drop _merge
	duplicates drop mca8010 mcme8010 mcm8010, force
	save $db_out/cluster_vars.dta, replace
restore

/***************************************************************/
/* Create shares dataset */
/***************************************************************/ 
preserve
    keep mcarp8010 mca8010 city_rur_mig_91_sh 
    merge m:1 mca8010 using $db_out/cluster_vars
    keep if _merge == 3
    drop _merge
    save $db_out/shift_share_db_shares.dta, replace
restore

/***************************************************************/
/* Create rural area dataset */
/***************************************************************/ 
keep mca8010 drought_dws0009 
duplicates drop mca8010, force
save $db_out/shift_share_db_rural.dta, replace


